# Description ------------------------------------------------------------------

## This document creates additional analysis objects that are versions of the
## market month data.

# Packages ---------------------------------------------------------------------
#if packages are not installed, they must be installed with:
# install.packages("package_name"); The name of package must be in quotes
library(dplyr)
library(stringr)

# Data -------------------------------------------------------------------------

load("data/2_clean/month_data_merged.RData")

# make time marker for DID for revenue data
market_month <- market_month %>% 
  mutate(Endline = ifelse(year_month == "2018_11", 1, 0))

#making treatment status a string variable for ease of use later
market_month <- market_month %>% 
  mutate(treatment_status = as.character(treatment_status))

#creating variable that is 1 when a market reported NA or 0 collected
market_month <- market_month %>% 
  mutate(zero_col = ifelse(is.na(market_fees_collected_cl_no0s), 1, 0),
         zero_col = ifelse(year_month == "2017_10", NA, zero_col))

#for H2 analysis, cut down to only november
market_month_novs <- market_month[market_month$month == "November", ]

# create separate DF for DIM
market_month_nov18 <- filter(market_month_novs, Endline == 1)

#make data frame of just differences
market_month_diffs <- market_month_nov18
#difference between November 2018 and November 2017
market_month_diffs$nov18_nov17_st <- market_month_diffs$market_fees_collected_cl_no0s_st -
  market_month$market_fees_collected_cl_no0s_st[market_month$year_month == "2017_11"]
#difference between November 2018 and December 2017
market_month_diffs$nov18_dec17_st <- market_month_diffs$market_fees_collected_cl_no0s_st -
  market_month$market_fees_collected_cl_no0s_st[market_month$year_month == "2017_12"]
## repeat this for revenue per tax collector variable
#difference between November 2018 and November 2017
market_month_diffs$nov18_nov17_fc <- market_month_diffs$market_fees_collected_cl_no0s_fc -
  market_month$market_fees_collected_cl_no0s_fc[market_month$year_month == "2017_11"]
#difference between November 2018 and December 2017
market_month_diffs$nov18_dec17_fc <- market_month_diffs$market_fees_collected_cl_no0s_fc -
  market_month$market_fees_collected_cl_no0s_fc[market_month$year_month == "2017_12"]
## repeat this for fee units per tax collector variable
#difference between November 2018 and November 2017
market_month_diffs$nov18_nov17_st_fc <- market_month_diffs$market_fees_collected_cl_no0s_st_fc -
  market_month$market_fees_collected_cl_no0s_st_fc[market_month$year_month == "2017_11"]
#difference between November 2018 and December 2017
market_month_diffs$nov18_dec17_st_fc <- market_month_diffs$market_fees_collected_cl_no0s_st_fc -
  market_month$market_fees_collected_cl_no0s_st_fc[market_month$year_month == "2017_12"]

#Repeat above, for the logged versions of the variables
#difference between November 2018 and November 2017
market_month_diffs$nov18_nov17_st_log <- market_month_diffs$market_fees_collected_cl_no0s_st_log -
  market_month$market_fees_collected_cl_no0s_st_log[market_month$year_month == "2017_11"]
#difference between November 2018 and December 2017
market_month_diffs$nov18_dec17_st_log <- market_month_diffs$market_fees_collected_cl_no0s_st_log -
  market_month$market_fees_collected_cl_no0s_st_log[market_month$year_month == "2017_12"]
## repeat this for revenue per tax collector variable
#difference between November 2018 and November 2017
market_month_diffs$nov18_nov17_fc_log <- market_month_diffs$market_fees_collected_cl_no0s_fc_log -
  market_month$market_fees_collected_cl_no0s_fc_log[market_month$year_month == "2017_11"]
#difference between November 2018 and December 2017
market_month_diffs$nov18_dec17_fc_log <- market_month_diffs$market_fees_collected_cl_no0s_fc_log -
  market_month$market_fees_collected_cl_no0s_fc_log[market_month$year_month == "2017_12"]
## repeat this for fee units per tax collector variable
#difference between November 2018 and November 2017
market_month_diffs$nov18_nov17_st_fc_log <- market_month_diffs$market_fees_collected_cl_no0s_st_fc_log -
  market_month$market_fees_collected_cl_no0s_st_fc_log[market_month$year_month == "2017_11"]
#difference between November 2018 and December 2017
market_month_diffs$nov18_dec17_st_fc_log <- market_month_diffs$market_fees_collected_cl_no0s_st_fc_log -
  market_month$market_fees_collected_cl_no0s_st_fc_log[market_month$year_month == "2017_12"]

#for H2 robustness check, compare December 2017 to November 2018
market_month_dec17_nov18 <- market_month %>% 
  filter(year_month == "2018_11" | year_month == "2017_12")

# create separate DF for baseline robustness checks
market_month_dec17 <- market_month %>% 
  filter(year_month == "2017_12")

market_month_nov17 <- market_month %>% 
  filter(year_month == "2017_11")


#average fee collector per market
market_month_fc_avg <- market_month %>% group_by(official_name) %>% 
  summarise(num_fee_collectors_cl = mean(num_fee_collectors_cl, na.rm = T),
            treatment_status = unique(treatment_status))

# Save to disk -----------------------------------------------------------------
objs <- ls()
objs <- objs[str_detect(objs, "market_month_")]
save(list = objs, file = "data/3_formatted/market_month_additional.RData")
